import requests
import json
import concurrent.futures
import threading
url = 'http://192.168.110.46:8001/v1/chat/completions'
#url = 'https://cloud.infini-ai.com/maas/v1/chat/completions'
#url = 'https://api.siliconflow.cn/v1/chat/completions'
# Replace with your own API key
api_key = ''

#model = "test"
model = "Qwen2-72B"
file_path = "合成数据决赛赛题.jsonl"
write_lock = threading.Lock()
data = []
with open(file_path,"r",encoding="utf-8") as f:
    for line in f:
        d = json.loads(line)  # 将 JSONL 行解析为 Python 字典
        data.append(d) 

prompt = '''
你是一位专业且有帮助的AI助手,专注于理解并调用API工具来解答用户的问题,你的回答需要科学严谨、严格符合输出格式,具体要求如下:

1.**定义:**
    - **api列表的定义:**你会收到一个api列表,里面存有若干可以调用的api,api的'description'字段表示这个api的功能,'parameters'字段'表示调用时应该填入的参数.
    - **user_messages列表的定义:**user_messages列表表示用户提出的若干需求,这些需求可能包含了需要调用的api的参数.

2.**任务内容:**
    -你需要从头开始逐一分析user_messages列表里的每一个元素
    -首先思考用户的需求需要你调用哪几个api
    -然后思考在第几个元素的时候你获得了调用这些api所需的全部参数
    -最后思考用什么样的正确格式填入参数.

3.**输出格式要求:**
    -你需要输出一个列表,这个列表的长度和user_messages列表相等,表示对于用户的每一句话做出的回应.
    -这个列表的全部元素都是列表,但是只有一个列表是非空列表.
    -这个非空列表出现的位置就是你认为的已经获得足够可以调用若干个api的参数的那句user_messages对应的位置.
    -非空列表中有若干个字典,每个字典的key有两个,分别是"name"表示api名称,以及"arguments"表示api需要填入的参数
    -你可以只调用一个api,或者调用一个api多次,或者调用多个不同的api来回答,但是输出列表只能有一个非空元素表示你回答的那轮对话
    -你需要先思考再回答格式化的输出,输出必须能被python的eval()函数转化,不可以带有中文标点符号.
    -你所有的话一定全部都写在思考内容里,</think>之后只能输出一个格式化的列表,不允许多说别的话!

4.**演示示例:**
    -**输入示例:**
{"id": "827", "apis": [{"name": "query_token_balance", "description": "查询指定地址在特定区块链上的代币余额.", "parameters": {"type": "object", "properties": {"product_id": {"type": "string", "description": "钱包的区块链地址"}, "token_contract_address": {"type": "string", "description": "代币的智能合约地址"}, "network": {"type": "string", "description": "区块链网络名称"}, "include_usd_value": {"type": "boolean", "description": "是否包含以美元计算的余额价值", "default": false}, "decimals": {"type": "nteger", "description": "产品是否激活", "default": true}}, "required": ["wallet_address", "token_contract_address", "network"]}}, {"name": "milk_quality_analysis", "description": "分析牛奶的质量,包括成分和卫生指标", "parameters": {"type": "object", "properties": {"fat_content": {"type": "number", "description": "牛奶中的脂肪含量（百分比）"}, "protein_content": {"type": "number", "description": "牛奶中的蛋白质含量（百分比）"}, "lactose_content": {"type": "number", "description": "牛奶中的乳糖含量（百分比）"}, "somatic_cell_count": {"type": "number", "description": "牛奶中的体细胞数（每毫升）"}, "total_bacterial_count": {"type": "number", "description": "牛奶中的总细菌数（每毫升）"}}, "required": ["fat_content", "protein_content", "lactose_content", "somatic_cell_count", "total_bacterial_count"]}}, {"name": "monitor_machine_health", "description": "监控机器设备的健康状态,减少故障和停机时间", "parameters": {"type": "object", "properties": {"machine_id": {"type": "string", "description": "机器设备的唯一标识符"}, "vibration_levels": {"type": "number", "description": "机器运作时的振动幅度"}, "temperature": {"type": "number", "description": "机器运作时的温度"}, "operating_hours": {"type": "number", "description": "机器已连续运行的小时数"}, "include_downtime_analysis": {"type": "boolean", "description": "是否包括停机分析报告"}}, "required": ["machine_id", "vibration_levels", "temperature", "operating_hours", "include_downtime_analysis"]}}], "user_messages": ["你好", "我想查一下钱包地址0x742d35Cc6634C0532925a3b844Bc454e4438f44e在Ethereum网络上,代币合约地址为0x1985365e9f78359a9B6AD760e32412f4a445E862的代币余额,并且需要包含以美元计算的余额价值,代币的小数位数是18.另外,我还想查一下钱包地址bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq在Bitcoin网络上,代币合约地址为0x6B175474E89094C44Da98b954EedeAC495271d0F的代币余额."]}
    --**输出示例:**
<think>..你的思考过程..</think>
[[],[{"name": "query_token_balance","arguments": {"wallet_address": "0x742d35Cc6634C0532925a3b844Bc454e4438f44e", "token_contract_address": "0x1985365e9f78359a9B6AD760e32412f4a445E862", "network": "Ethereum", "include_usd_value": true, "decimals": 18}}, {"name": "query_token_balance","arguments": {"wallet_address": "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq", "token_contract_address": "0x6B175474E89094C44Da98b954EedeAC495271d0F", "network": "Bitcoin"}}]]

5.**以下是我的正式输入,请开始任务:**
'''
prompt2 = """
你是一位专业且有帮助的AI助手,专注于理解并调用API工具来解答用户的问题,你的回答需要科学严谨、严格符合输出格式,具体要求如下:

1.**定义:**
    - **api列表的定义:**你会收到一个api列表,里面存有若干可以调用的api,api的'description'字段表示这个api的功能,'parameters'字段'表示调用时应该填入的参数.
    - **user_messages列表的定义:**user_messages列表表示用户提出的若干需求,这些需求可能包含了需要调用的api的参数.

2.**任务内容:**
    -你需要从头开始逐一分析user_messages列表里的每一个元素,每一个元素代表一句需求
    -首先思考用户的需求需要你调用哪几个api,对于每一个元素都要认真判断
    -然后思考在第几个元素的时候你获得了调用这些api所需的全部参数
    -最后思考用什么样的正确格式填入参数
    -想象你的输出是对于用户每句话的回复,但是你只在获得了足够调用这些api的全部参数时回复仅仅一次内容,其他时候你回复空列表
    -注意你的整个输出列表长度必须和user_messages列表相等!

3.**输出格式要求:**
    -你需要输出一个列表,这个列表的长度和user_messages列表相等!表示对于用户的每一句话做出的回应.
    -这个列表的全部元素都是列表,但是只有一个列表是非空列表.
    -这个非空列表出现的位置就是你认为的已经获得足够可以调用若干个api的参数的那句user_messages对应的位置.
    -非空列表中有若干个字典,每个字典的key有两个,分别是"name"表示api名称,以及"arguments"表示api需要填入的参数
    -你可以只调用一个api,或者调用一个api多次,或者调用多个不同的api来回答,但是输出列表只能有一个非空元素表示你回答的那轮对话
    -你必须回答格式化的输出,输出必须能被python的eval()函数转化,不可以带有中文标点符号,不可以缺少必要的中括号和大括号!
    
4.**演示示例:**
    -**输入示例:**
{"id": "827", "apis": [{"name": "query_token_balance", "description": "查询指定地址在特定区块链上的代币余额.", "parameters": {"type": "object", "properties": {"product_id": {"type": "string", "description": "钱包的区块链地址"}, "token_contract_address": {"type": "string", "description": "代币的智能合约地址"}, "network": {"type": "string", "description": "区块链网络名称"}, "include_usd_value": {"type": "boolean", "description": "是否包含以美元计算的余额价值", "default": false}, "decimals": {"type": "nteger", "description": "产品是否激活", "default": true}}, "required": ["wallet_address", "token_contract_address", "network"]}}, {"name": "milk_quality_analysis", "description": "分析牛奶的质量,包括成分和卫生指标", "parameters": {"type": "object", "properties": {"fat_content": {"type": "number", "description": "牛奶中的脂肪含量（百分比）"}, "protein_content": {"type": "number", "description": "牛奶中的蛋白质含量（百分比）"}, "lactose_content": {"type": "number", "description": "牛奶中的乳糖含量（百分比）"}, "somatic_cell_count": {"type": "number", "description": "牛奶中的体细胞数（每毫升）"}, "total_bacterial_count": {"type": "number", "description": "牛奶中的总细菌数（每毫升）"}}, "required": ["fat_content", "protein_content", "lactose_content", "somatic_cell_count", "total_bacterial_count"]}}, {"name": "monitor_machine_health", "description": "监控机器设备的健康状态,减少故障和停机时间", "parameters": {"type": "object", "properties": {"machine_id": {"type": "string", "description": "机器设备的唯一标识符"}, "vibration_levels": {"type": "number", "description": "机器运作时的振动幅度"}, "temperature": {"type": "number", "description": "机器运作时的温度"}, "operating_hours": {"type": "number", "description": "机器已连续运行的小时数"}, "include_downtime_analysis": {"type": "boolean", "description": "是否包括停机分析报告"}}, "required": ["machine_id", "vibration_levels", "temperature", "operating_hours", "include_downtime_analysis"]}}], "user_messages": ["你好", "我想查一下钱包地址0x742d35Cc6634C0532925a3b844Bc454e4438f44e在Ethereum网络上,代币合约地址为0x1985365e9f78359a9B6AD760e32412f4a445E862的代币余额,并且需要包含以美元计算的余额价值,代币的小数位数是18.另外,我还想查一下钱包地址bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq在Bitcoin网络上,代币合约地址为0x6B175474E89094C44Da98b954EedeAC495271d0F的代币余额."]}
    --**输出示例:**
[[],[{"name": "query_token_balance","arguments": {"wallet_address": "0x742d35Cc6634C0532925a3b844Bc454e4438f44e", "token_contract_address": "0x1985365e9f78359a9B6AD760e32412f4a445E862", "network": "Ethereum", "include_usd_value": true, "decimals": 18}}, {"name": "query_token_balance","arguments": {"wallet_address": "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq", "token_contract_address": "0x6B175474E89094C44Da98b954EedeAC495271d0F", "network": "Bitcoin"}}]]

5.**以下是我的正式输入,请开始任务:**
"""


def generate_ans(text_content):
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    }


    data = {
        "model": model,
        "messages": [
            {"role": "system", "content": prompt2},
            {"role": "user", "content": text_content}
        ],
        "temperature": 0
    }
    response = requests.post(url, headers=headers, json=data, verify=False, timeout=300)
    if response.status_code == 200:
        return response.json()["choices"][0]["message"]['content']
    else:
        print(f"Error: {response.status_code}")
        print(response.content)
        return None
def process_entry(current_data):
    entry_id = current_data['id']
    print(f"Processing ID: {entry_id}")
    
    output1 = ""
    output2 = []
    retry_count = 0
    
    while retry_count < 1:
        try:
            output = generate_ans(str(current_data))
            if output:
                # 解析输出内容
                if "</think>" in output:
                    output2_str = output.split("</think>")[1].strip()
                else:
                    output2_str = output
                
                # 清理格式
                output2_str = output2_str.replace("```json", "").replace("```", "").strip()
                output2_str = output2_str.replace("true", "True").replace("false", "False").replace("null", "None")
                
                try:
                    output2 = eval(output2_str)
                    check = False
                    for ii in output2:
                        if len(ii)!=0:
                            check = True
                    if check:
                        break  # 解析成功则退出重试循环
                    else:
                        retry_count += 1
                except Exception as e:
                    print(f"ID {entry_id} 解析失败: {e}")
                    print(output2_str)
                    retry_count += 1
            else:
                retry_count += 1
        except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
            print(f"ID {entry_id} 网络错误: {e}")
            retry_count += 1
        except Exception as e:
            print(f"ID {entry_id} 未知错误: {e}")
            retry_count += 1

    # 构建结果数据
    result = {
        "id": entry_id,
        "targets": output2,
        "apis": current_data['apis'],
        "user_messages": current_data['user_messages']
    }
    
    # 线程安全地写入文件
    with write_lock:
        with open('result_0321_sft.jsonl', 'a', encoding='utf-8') as f:
            f.write(json.dumps(result, ensure_ascii=False) + "\n")
    
    return result

if __name__ == "__main__":
    # 读取数据
    with open(file_path, "r", encoding="utf-8") as f:
        dataset = [json.loads(line) for line in f]
    
    # 创建线程池（根据机器性能调整max_workers）
    with concurrent.futures.ThreadPoolExecutor(max_workers=256) as executor:
        # 提交所有任务
        futures = [executor.submit(process_entry, data) for data in dataset]
        
        # 可选：实时显示进度
        for future in concurrent.futures.as_completed(futures):
            try:
                future.result()
            except Exception as e:
                print(f"任务异常: {e}")

    print("处理完成！")      
# with open('result_0318.jsonl', 'a', encoding='utf-8') as out_file:
#     for i in range(len(data)):
#         print(i)
#         current_data = data[i]
#         id = current_data['id']
#         #print(data[i])
#         retry_count = 0
#         output = None
        
#         while retry_count < 10:
#             # 调用 generate_ans 获取 output
#             try:
#                 output = generate_ans(str(current_data))
#             except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
#                 print(f"Network error: {e}")
#                 retry_count += 1
#                 break
#             except Exception as e:
#                 print(f"Unexpected error: {e}")
#                 retry_count += 1
#                 break
#             print(output)
#             output1 = []
#             output2 = output
#             if output:  # 如果 output 非空,则继续处理
#                 if "</think>" in output:
#                     output1 = output.split("</think>")[0]
#                     output2 = output.split("</think>")[1]
                
#                 if "```json" in output2:
#                     output2 = output2.split("```json")[1][1:]
#                 if "```" in output2:
#                     output2 = output2.split("```")[0]
#                 output2 = output2.replace("true","True")
#                 output2 = output2.replace("false","False")
#                 output2 = output2.replace("null","None")
#                 try:
#                     output2 = eval(output2)
#                     break  # 如果 eval 成功,则退出循环
#                 except Exception as e:
#                     print(f"Error in eval: {e}",id)
#                     retry_count += 1  # 增加重试次数
#                     if retry_count == 10:
#                         output1 = ""
#                         output2 = []  # 如果重试三次都失败,则将 output 设置为空列表
#                         print(f"重试 {retry_count} 次失败,返回空列表")
#                     else:
#                         print("Retrying...")
#             else:
#                 retry_count += 1 
                

